The most recent update of this html document occurred: Wed Nov 30 11:01:04 2016
> library(knitr)
> library(ggplot2)
> library(reshape)
> library(DESeq2)
> library(CHBUtils)
> library(limma)
> library(gtools)
> library(gridExtra)
> library(devtools)
> library(dplyr)
> library(tidyr)
> library(pheatmap)
> library(rio)> exp1 = import("Experiment1_Data.xlsx")
> exp1$Gene_Name = gsub("\\'", "", exp1$Gene_Name)
> colnames(exp1) = gsub("\\+", "p", colnames(exp1))
> exp2 = import("Experiment2_Data.xlsx")
> exp2$Gene_Name = gsub("\\'", "", exp2$Gene_Name)
> colnames(exp2) = gsub("\\+", "p", colnames(exp2))
>
> data = inner_join(exp1[, 1:11] %>% filter(Gene_Name != ""), exp2[, 1:11] %>%
+ filter(Gene_Name != ""), by = "Gene_Name") %>% select(Gene_Name, Entrez_Gene_ID = Entrez_Gene_ID.x,
+ Description = Description.x, d1_nt = nt.x, d1_sh17 = sh17.x, d1_sh16 = sh16.x,
+ d1_ntp = ntp.x, d1_sh17p = sh17p.x, d1_sh16p = sh16p.x, d2_nt = nt.y, d2_sh17 = sh17.y,
+ d2_sh16 = sh16.y, d2_ntp = ntp.y, d2_sh17p = sh17p.y, d2_sh16p = sh16p.y)
>
> counts = data[, 4:15]
> row.names(counts) = data$Gene_Name
>
>
> metadata = rbind(data.frame(donor = "donor1", group = colnames(exp1)[3:8]) %>%
+ mutate(name = paste0("d1_", group)), data.frame(donor = "donor2", group = colnames(exp2)[3:8]) %>%
+ mutate(name = paste0("d2_", group))) %>% mutate(treat = ifelse(grepl("p$",
+ group), "plus", "minus")) %>% mutate(type = gsub("p$", "", group))
> rownames(metadata) = metadata$name
> metadata = metadata[colnames(counts), ]
> metadata donor group name treat type
d1_nt donor1 nt d1_nt minus nt
d1_sh17 donor1 sh17 d1_sh17 minus sh17
d1_sh16 donor1 sh16 d1_sh16 minus sh16
d1_ntp donor1 ntp d1_ntp plus nt
d1_sh17p donor1 sh17p d1_sh17p plus sh17
d1_sh16p donor1 sh16p d1_sh16p plus sh16
d2_nt donor2 nt d2_nt minus nt
d2_sh17 donor2 sh17 d2_sh17 minus sh17
d2_sh16 donor2 sh16 d2_sh16 minus sh16
d2_ntp donor2 ntp d2_ntp plus nt
d2_sh17p donor2 sh17p d2_sh17p plus sh17
d2_sh16p donor2 sh16p d2_sh16p plus sh16
> counts = counts[rowSums(counts[, 1:6] > 0) > 2 & rowSums(counts[, 6:12] > 0) >
+ 2, ]
>
> log2counts = log2(counts + 1)
>
> ggplot(melt(log2counts), aes(x = value, color = variable)) + geom_density() +
+ theme_bw()> mds(log2counts, k = 2, condition = metadata$donor) + theme_bw() ## Differential analysis with DESeq2
Just using DESeq2 without any further addition to the analysis
> dds = DESeqDataSetFromMatrix(counts, metadata, design = ~donor + treat + type)
> # rdds = rlog(dds)
> dds = DESeq(dds)
> DESeq2::plotDispEsts(dds)> sf <- sizeFactors(dds)
> disp <- dispersions(dds)
> dds_group = DESeqDataSetFromMatrix(counts, metadata, design = ~donor + group)
> sizeFactors(dds_group) <- sf
> dispersions(dds_group) <- disp
> dds_group <- nbinomWaldTest(dds_group)How the gene SET looks like:
> res_sh16_vs_control = results(dds_group, list("groupsh16", "groupnt"))
> suppressWarnings(DEGreport::degPlot(dds_group, res_sh16_vs_control["SET", ,
+ drop = FALSE], n = 1, xs = "treat", group = "type", batch = "donor"))> res_nt_vs_plus = results(dds_group, list("groupntp", "groupnt"))
> DESeq2::plotMA(res_nt_vs_plus)> res = res_nt_vs_plus %>% as.data.frame() %>% tibble::rownames_to_column("id") %>%
+ arrange(padj) %>% tibble::column_to_rownames("id")
>
> res %>% head(15) %>% knitr::kable()| baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
|---|---|---|---|---|---|---|
| CYBB | 2900830.1 | 1.2927340 | 0.1527038 | 8.465631 | 0e+00 | 0.00e+00 |
| AKR1B1 | 11388704.5 | -0.8760231 | 0.1266923 | -6.914573 | 0e+00 | 0.00e+00 |
| DYSF | 20773280.4 | 1.3837569 | 0.2035469 | 6.798222 | 0e+00 | 0.00e+00 |
| ACSL1 | 16567561.0 | 1.0930494 | 0.1643207 | 6.651929 | 0e+00 | 1.00e-07 |
| CYBA | 1776465.4 | 1.1292498 | 0.1735012 | 6.508601 | 0e+00 | 1.00e-07 |
| AKAP13 | 9230549.6 | 0.4631490 | 0.0736832 | 6.285681 | 0e+00 | 4.00e-07 |
| DHRS9 | 825946.3 | 1.0590918 | 0.1797898 | 5.890722 | 0e+00 | 3.60e-06 |
| RAB43 | 1060144.4 | 0.6151003 | 0.1040928 | 5.909156 | 0e+00 | 3.60e-06 |
| MLKL | 1774305.4 | 0.6338305 | 0.1080660 | 5.865217 | 0e+00 | 3.80e-06 |
| GRAMD1A | 249876.0 | 0.7678239 | 0.1340367 | 5.728459 | 0e+00 | 7.70e-06 |
| CEP170 | 7763560.4 | 0.3140712 | 0.0552059 | 5.689086 | 0e+00 | 8.80e-06 |
| UTRN | 17584830.4 | 0.7712507 | 0.1393132 | 5.536092 | 0e+00 | 1.95e-05 |
| MMP8 | 374974.2 | 0.9100422 | 0.1689469 | 5.386556 | 1e-07 | 4.18e-05 |
| CHMP2A | 4830981.4 | 0.4540989 | 0.0846320 | 5.365572 | 1e-07 | 4.36e-05 |
| FGR | 2195862.8 | 0.9091360 | 0.1704786 | 5.332846 | 1e-07 | 4.87e-05 |
> volcano_density_plot(res[, c("log2FoldChange", "pvalue")], shade.colour = "orange")> suppressWarnings(DEGreport::degPlot(dds_group, res, n = 9, xs = "treat", group = "type",
+ batch = "donor"))> deseq2_ntplus_effect = res> res_sh16_vs_plus = results(dds_group, list("groupsh16p", "groupsh16"))
> DESeq2::plotMA(res_sh16_vs_plus)> res = res_sh16_vs_plus %>% as.data.frame() %>% tibble::rownames_to_column("id") %>%
+ arrange(padj) %>% tibble::column_to_rownames("id")
>
> res %>% head(15) %>% knitr::kable()| baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
|---|---|---|---|---|---|---|
| ACSL1 | 16567561.0 | 1.2779508 | 0.1643207 | 7.777172 | 0e+00 | 0.0000000 |
| CYBB | 2900830.1 | 1.1087726 | 0.1527041 | 7.260924 | 0e+00 | 0.0000000 |
| DHRS9 | 825946.3 | 1.1875589 | 0.1797906 | 6.605232 | 0e+00 | 0.0000001 |
| DYSF | 20773280.4 | 1.3021073 | 0.2035469 | 6.397086 | 0e+00 | 0.0000003 |
| UTRN | 17584830.4 | 0.8748320 | 0.1393133 | 6.279600 | 0e+00 | 0.0000005 |
| EFHD2 | 19151142.9 | 0.4732838 | 0.0791501 | 5.979574 | 0e+00 | 0.0000028 |
| DOK3 | 4842773.4 | 0.5519350 | 0.0941608 | 5.861625 | 0e+00 | 0.0000050 |
| AKAP13 | 9230549.6 | 0.4194689 | 0.0736838 | 5.692827 | 0e+00 | 0.0000105 |
| RBPJ | 1246748.9 | 0.5915706 | 0.1039267 | 5.692191 | 0e+00 | 0.0000105 |
| SLK | 14105526.4 | 0.3047064 | 0.0550687 | 5.533204 | 0e+00 | 0.0000238 |
| GRAMD1A | 249876.0 | 0.7319624 | 0.1340475 | 5.460473 | 0e+00 | 0.0000299 |
| HIP1 | 12592345.6 | 0.7236909 | 0.1324214 | 5.465058 | 0e+00 | 0.0000299 |
| HCK | 6270652.0 | 0.7460440 | 0.1423116 | 5.242327 | 2e-07 | 0.0000922 |
| AKR1B1 | 11388704.5 | -0.6597371 | 0.1266926 | -5.207387 | 2e-07 | 0.0001034 |
| SMPDL3A | 349707.7 | 1.0411864 | 0.2029895 | 5.129263 | 3e-07 | 0.0001466 |
> volcano_density_plot(res[, c("log2FoldChange", "pvalue")], shade.colour = "orange")> suppressWarnings(DEGreport::degPlot(dds_group, res, n = 9, xs = "treat", group = "type",
+ batch = "donor"))> deseq2_sh16plus_effect = res> res_sh17_vs_plus = results(dds_group, list("groupsh17p", "groupsh17"))
> DESeq2::plotMA(res_sh17_vs_plus)> res = res_sh17_vs_plus %>% as.data.frame() %>% tibble::rownames_to_column("id") %>%
+ arrange(padj) %>% tibble::column_to_rownames("id")
>
> res %>% head(15) %>% knitr::kable()| baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
|---|---|---|---|---|---|---|
| CYBB | 2900830.1 | 1.1552149 | 0.1527042 | 7.565051 | 0.00e+00 | 0.0000000 |
| ACSL1 | 16567561.0 | 0.9316196 | 0.1643207 | 5.669519 | 0.00e+00 | 0.0000229 |
| DYSF | 20773280.4 | 1.1549701 | 0.2035469 | 5.674221 | 0.00e+00 | 0.0000229 |
| EFHD2 | 19151142.9 | 0.4533775 | 0.0791500 | 5.728076 | 0.00e+00 | 0.0000229 |
| HEMGN | 266539.7 | -0.7048192 | 0.1248420 | -5.645692 | 0.00e+00 | 0.0000229 |
| AKR1B1 | 11388704.5 | -0.6950905 | 0.1266924 | -5.486443 | 0.00e+00 | 0.0000477 |
| MMP8 | 374974.2 | 0.9127065 | 0.1689500 | 5.402227 | 1.00e-07 | 0.0000656 |
| UTRN | 17584830.4 | 0.7242910 | 0.1393133 | 5.199009 | 2.00e-07 | 0.0001746 |
| HCK | 6270652.0 | 0.7336007 | 0.1423116 | 5.154891 | 3.00e-07 | 0.0001966 |
| CYBA | 1776465.4 | 0.8769022 | 0.1735016 | 5.054146 | 4.00e-07 | 0.0003015 |
| SEMA6B | 302314.0 | 0.5819162 | 0.1243450 | 4.679853 | 2.90e-06 | 0.0018198 |
| CD109 | 1324552.9 | -0.6074466 | 0.1333961 | -4.553707 | 5.30e-06 | 0.0028272 |
| TYROBP | 2200631.4 | 0.5676309 | 0.1244887 | 4.559696 | 5.10e-06 | 0.0028272 |
| CEP170 | 7763560.4 | 0.2428626 | 0.0552063 | 4.399184 | 1.09e-05 | 0.0054120 |
| DHRS9 | 825946.3 | 0.7827607 | 0.1797907 | 4.353733 | 1.34e-05 | 0.0054898 |
> volcano_density_plot(res[, c("log2FoldChange", "pvalue")], shade.colour = "orange")> suppressWarnings(DEGreport::degPlot(dds_group, res, n = 9, xs = "treat", group = "type",
+ batch = "donor"))> deseq2_sh17plus_effect = resThe idea is to use the DE genes from nt vs nt+ and remove the ones that changed due to sh6 vs sh16+ or sh17 vs sh17+.
We used the FDR < 0.05 to get the DE and pvalue>0.2 to get the non-changed genes.
> deseq2_sh_dependent = inner_join(deseq2_ntplus_effect %>% tibble::rownames_to_column("id") %>%
+ filter(padj < 0.05) %>% select(id, nt_padj = padj), inner_join(deseq2_sh16plus_effect %>%
+ tibble::rownames_to_column("id") %>% filter(pvalue > 0.2) %>% select(id,
+ sh16_padj = pvalue), deseq2_sh17plus_effect %>% tibble::rownames_to_column("id") %>%
+ filter(pvalue > 0.2) %>% select(id, sh17_padj = pvalue), by = "id"), by = "id") %>%
+ rowwise() %>% mutate(sh_max = max(sh16_padj, sh17_padj)) %>% mutate(diff = nt_padj -
+ sh_max) %>% arrange((nt_padj))
>
> suppressWarnings(DEGreport::degPlot(dds_group, deseq2_ntplus_effect[deseq2_sh_dependent$id,
+ ], n = 9, xs = "treat", group = "type", batch = "donor"))> d = model.matrix(~0 + donor + treat, metadata)
> y = normalizeBetweenArrays(log2(counts), method = "quantile")
>
> limma_ma = voomaByGroup(y, group = metadata$type, d, plot = FALSE)
>
> limmaPlot = function(counts, genes, metadata, xs = "time", group = "condition",
+ batch = NULL) {
+ pp = lapply(genes, function(gene) {
+ dd = data.frame(count = counts[gene, ], time = metadata[, xs])
+ if (is.null(group)) {
+ dd$treatment = "one_group"
+ } else {
+ dd$treatment = metadata[row.names(dd), group]
+ }
+ if (!is.null(batch)) {
+ dd$batch = metadata[row.names(dd), batch]
+ p = ggplot(dd, aes(x = time, y = count, color = batch, shape = treatment))
+ } else {
+ p = ggplot(dd, aes(x = time, y = count, color = treatment, shape = treatment))
+ }
+ p = p + stat_smooth(aes(x = time, y = count, group = treatment, color = treatment),
+ fill = "grey80") + geom_jitter(size = 1, alpha = 0.7, height = 0,
+ width = 0.2) + theme_bw(base_size = 7) + ggtitle(gene)
+ if (length(unique(dd$treatment)) == 1) {
+ p = p + scale_color_brewer(guide = FALSE, palette = "Set1") + scale_fill_brewer(guide = FALSE,
+ palette = "Set1")
+ }
+ p
+ })
+ n = ceiling(length(pp))
+ do.call(grid.arrange, pp)
+ }> library(limma)
> keep = grepl("nt", row.names(metadata))
> d = model.matrix(~0 + donor + treat, metadata[keep, ])
> y = normalizeBetweenArrays(log2(counts[, keep]), method = "quantile")
>
> v = voomaByGroup(y, group = metadata$type[keep], d, plot = TRUE)> f = lmFit(v, d)
> fb = eBayes(f)
>
>
> res = topTable(fb, coef = "treatplus", sort.by = "p", number = Inf)
>
> knitr::kable(head(res, 15))| logFC | AveExpr | t | P.Value | adj.P.Val | B | |
|---|---|---|---|---|---|---|
| CXCL8 | 2.581916 | 19.80694 | 9.767088 | 0.0000092 | 0.0344386 | 3.6667038 |
| SLPI | 1.890419 | 22.79432 | 9.091814 | 0.0000157 | 0.0344386 | 3.4500397 |
| S100A12 | 3.169849 | 23.15430 | 9.039778 | 0.0000164 | 0.0344386 | 3.3942174 |
| C14orf93 | 3.086437 | 18.35416 | 8.596161 | 0.0000238 | 0.0344386 | 2.7013356 |
| OSR2 | 3.342374 | 15.58042 | 8.542010 | 0.0000250 | 0.0344386 | 1.8329605 |
| CSF3 | 4.465571 | 18.46044 | 8.437917 | 0.0000273 | 0.0344386 | 2.5017388 |
| PCDHB2 | 1.893239 | 20.03219 | 7.839069 | 0.0000468 | 0.0492400 | 2.4330637 |
| KLHL35 | 3.440221 | 15.72689 | 7.673301 | 0.0000547 | 0.0492400 | 2.0309624 |
| CD177 | 2.529608 | 19.80707 | 7.587720 | 0.0000593 | 0.0492400 | 2.1975587 |
| S100A8 | 3.021722 | 26.91976 | 7.488745 | 0.0000651 | 0.0492400 | 2.1333866 |
| S100A9 | 2.390447 | 26.73337 | 7.249860 | 0.0000821 | 0.0564570 | 1.9374803 |
| NCAM1 | 1.847449 | 18.76559 | 7.071941 | 0.0000980 | 0.0590235 | 1.7140009 |
| DEFA3 | 4.510520 | 15.14801 | 6.986526 | 0.0001068 | 0.0590235 | 0.6898536 |
| TOPAZ1 | 2.491827 | 17.03690 | 6.963874 | 0.0001093 | 0.0590235 | 1.3456436 |
| DYSF | 1.804080 | 24.39469 | 6.709349 | 0.0001420 | 0.0715635 | 1.5057166 |
> volcano_density_plot(res[, c("logFC", "P.Value")], shade.colour = "orange")> suppressWarnings(DEGreport::degPlot(dds_group, res, n = 9, xs = "treat", group = "type",
+ batch = "donor"))> ntplus_effect = res> library(limma)
> keep = !grepl("nt", row.names(metadata))
> d = model.matrix(~0 + donor + treat + type, metadata[keep, ])
> y = normalizeBetweenArrays(log2(counts[, keep]), method = "quantile")
>
> v = voomaByGroup(y, group = metadata$type[keep], d, plot = TRUE)> f = lmFit(v, d)
> fb = eBayes(f)
>
> res = topTable(fb, coef = "treatplus", sort.by = "p", number = Inf)
>
> knitr::kable(head(res, 15))| logFC | AveExpr | t | P.Value | adj.P.Val | B | |
|---|---|---|---|---|---|---|
| RETN | 1.5377891 | 20.66102 | 23.35461 | 1.00e-07 | 0.0006611 | 7.588722 |
| S100A9 | 2.4332080 | 26.43037 | 21.01202 | 2.00e-07 | 0.0006758 | 6.936341 |
| CYBA | 1.0056334 | 20.53080 | 17.32453 | 7.00e-07 | 0.0016527 | 6.296766 |
| FCAR | 1.3789761 | 21.01399 | 16.57269 | 9.00e-07 | 0.0016692 | 6.150770 |
| CYBB | 1.2631403 | 21.18817 | 15.49117 | 1.40e-06 | 0.0020974 | 5.834846 |
| CD177 | 2.6267508 | 19.20314 | 14.66743 | 2.00e-06 | 0.0025165 | 5.263359 |
| SULT1B1 | 1.0076530 | 19.38423 | 12.61322 | 5.40e-06 | 0.0044148 | 4.563903 |
| ACSL1 | 1.1690543 | 23.65905 | 12.60906 | 5.40e-06 | 0.0044148 | 4.768172 |
| CES1 | 0.5766228 | 20.75964 | 12.49156 | 5.80e-06 | 0.0044148 | 4.637054 |
| ITGAM | 0.7883593 | 22.90557 | 12.17975 | 6.80e-06 | 0.0044148 | 4.566317 |
| NAMPT | 0.7974357 | 24.81606 | 12.17556 | 6.90e-06 | 0.0044148 | 4.524472 |
| MCEMP1 | 1.4549909 | 19.34002 | 12.13479 | 7.00e-06 | 0.0044148 | 4.348670 |
| HLA-DQA1 | -0.9592994 | 19.89827 | -11.19283 | 1.19e-05 | 0.0066671 | 3.953712 |
| HIP1 | 0.6582028 | 23.38038 | 11.13031 | 1.23e-05 | 0.0066671 | 4.017406 |
| ACPP | 0.6758494 | 19.61586 | 10.86617 | 1.44e-05 | 0.0069528 | 3.770140 |
> volcano_density_plot(res[, c("logFC", "P.Value")], shade.colour = "orange")> suppressWarnings(DEGreport::degPlot(dds_group, res, n = 9, xs = "treat", group = "type",
+ batch = "donor"))> shplus_effect = res> sh_dependent = inner_join(ntplus_effect %>% tibble::rownames_to_column("id") %>%
+ filter(adj.P.Val < 0.1), shplus_effect %>% tibble::rownames_to_column("id") %>%
+ filter(adj.P.Val > 0.2), by = "id")
>
> suppressWarnings(DEGreport::degPlot(dds_group, ntplus_effect[sh_dependent$id,
+ ], n = 8, xs = "treat", group = "type", batch = "donor"))> # suppressWarnings(limmaPlot(limma_ma$E, sh_dependent$id, metadata, xs =
> # 'treat', group = 'type', batch = 'donor' ))It doesn’t help a lot this time. I guess too few replicates to remove noise from any of them.
> library(RUVSeq)
> ruv = RUVs(counts(dds_group), cIdx = row.names(counts), k = 1, scIdx = matrix(c(1:6,
+ 7:12), nrow = 6))
> mds(ruv$normalizedCounts, k = 2, metadata$group) + ggtitle("1 factor")> ruv = RUVs(as.matrix(counts), cIdx = row.names(counts), k = 2, scIdx = matrix(c(1:6,
+ 7:12), nrow = 6))
> mds(ruv$normalizedCounts, k = 2, metadata$group) + ggtitle("2 factors")> ruv = RUVs(as.matrix(counts), cIdx = row.names(counts), k = 3, scIdx = matrix(c(1:6,
+ 7:12), nrow = 6))
> mds(ruv$normalizedCounts, k = 2, metadata$group) + ggtitle("3 factors")